#
# prepareData.r
#
# Prepare data to pass to JAGS.
#
# Hill, Seth J. and Chris Tausanovitch. "A Disconnect in Representation? Comparison of Trends in Congressional and Public Polarization."
#

library(foreign)

dims <- 1
minyear <- 1956

vars <- read.csv("ANES_Cum_Vars_ToUse.csv")

if (run == "full") {
  load("anes_cdf_extra_50s60s.RData")
} else if (run == "social") {
  load("anes_cdf_extra_50s60s_social_only.RData")
  minyear <- 1980
} else if (run == "unglued") {
  load("anes_cdf_extra_50s60s_less_glue.RData")
}

data <- anes.cdf.with.2012

year  <- data$VCF0004

# subset down to only the variables we will use for scaling
smalldata <- data[year >= minyear,names(data) %in% c(as.character(vars$VarNumber),extra.vars.50s.60s$VariableNumber)]
if (run == "social") {
  smalldata <- data[year >= minyear,names(data) %in% c(as.character(vars$VarNumber), extra.vars.50s.60s$VariableNumber, additional.2012.items$VarNumber)]
}
if (run == "unglued") {
  smalldata <- data[year >= minyear,names(data) %in% c(as.character(unglued.vars$VarNumber))]
}

# Remove any questions which were not asked in the time period in question
smalldata <- smalldata[,! is.na(colMeans(smalldata,na.rm=T))]

party <- data$VCF0301[year >= minyear]
year <- year[year >= minyear]

dims <- 1
# generate start values based on party id and write them to a file
startvalues <- rep(NA,dim(smalldata)[1]*dims)
numParty <- function(x){
  return(length(which(party==x)))
}

startvalues[party %in% c(1)] <- runif(numParty(1),-1,-0.5)
startvalues[party %in% c(2)] <- runif(numParty(2),-1,-0.5)
startvalues[party %in% c(3)] <- runif(numParty(3),-1,-0.5)
startvalues[party %in% c(4)] <- runif(numParty(4),-1,1)
startvalues[party %in% c(5)] <- runif(numParty(5),0.5,1)
startvalues[party %in% c(6)] <- runif(numParty(6),0.5,1)
startvalues[party %in% c(7)] <- runif(numParty(7),0.5,1)
startvalues[is.na(party)] <- runif(length(which(is.na(party))),-1,1)

### BLANKET RULE FOR NAS FOR NOW: 9s, 8s, an 0s are NA
smalldata[as.matrix(smalldata)==9] <- NA
smalldata[as.matrix(smalldata)==8] <- NA
smalldata[as.matrix(smalldata)==0] <- NA

jdata <- prepareForGpc(smalldata,no.na=T)

pid3 <- rep("I",length(party))
pid3[party %in% c(5,6,7)] <- "R"
pid3[party %in% c(1,2,3)] <- "D"

yearpartyNames <- paste(year,pid3,sep="-")
yearparty <- as.integer(as.factor(yearpartyNames))
codes <- cbind(year,pid3,yearparty)

jdata$codes <- unique(codes)
jdata$yearparty <- yearparty
jdata$nyearparty <- length(unique(yearparty))

if (run == "full") {
  save("jdata","year","party","startvalues",file="allyearsMultinomialFull.RData")
} else if (run == "social") {
  save("jdata","year","party","startvalues",file="allyearsMultinomialSocial.RData")
} else if (run == "unglued") {
  save("jdata","year","party","startvalues",file="allyearsMultinomialUnglued.RData")
}

